home *** CD-ROM | disk | FTP | other *** search
Text File | 1988-08-16 | 50.9 KB | 1,850 lines |
- -h- lz.h Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZ.H;72
- /*
- * Header file for all lz compression/decompression routines.
- *
- * Machine/Operating system/compiler selection: (#ifdef'ed)
- * vax Vax/Unix or Vax/VMS
- * pdp11 makes a small compressor
- * M_XENIX "large-model" Z8000
- * interdata Signed long compare is slow
- * unix Defined on true Unix systems
- * decus Decus C (no signal)
- * vms Vax/VMS (VMS_V4 may be set automatically)
- * #define readonly If the compiler doesn't support it correctly.
- *
- * Compiler configuration (#if'ed):
- * #define vax_asm TRUE/FALSE TRUE on Vax (4bsd) if the compiler supports
- * the asm() operator. Check the generated code!
- * #define UCHAR TRUE/FALSE TRUE if compiler supports unsigned char
- * #define DEBUG TRUE/FALSE TRUE to compile in debug printouts
- *
- * Algorithm Tuning parameters:
- * #define USERMEM <n> Memory available to compress.
- * If large enough, a faster algorithm is used.
- * #define SACREDMEM <n> Don't use this part of USERMEM.
- * #define BITS <n> Maximum number of code bits.
- * #define MAXIO <n> Output buffer size (squeeze memory if needed)
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include <setjmp.h>
- #ifndef decus
- # include <signal.h>
- /*
- * Arguments to signal():
- */
- extern int abort(); /* Debugging interrupt trap */
- extern int interrupt(); /* Non-debugging interrupt trap */
- extern int address_error(); /* "Segment" violation */
- #endif
-
- #ifndef TRUE
- # define FALSE 0
- # define TRUE 1
- #endif
- #ifndef EOS
- # define EOS '\0'
- #endif
- #define streq(a, b) (strcmp((a), (b)) == 0)
- #define min(a,b) ((a) > (b)) ? (b) : (a))
-
- /*
- * Set USERMEM to the maximum amount of physical user memory available
- * in bytes. USERMEM is used to determine the maximum BITS that can be used
- * for compression.
- *
- * SACREDMEM is the amount of physical memory saved for others; compress
- * will hog the rest.
- */
-
- #ifndef SACREDMEM
- # define SACREDMEM 0
- #endif
-
- /*
- * Set machine-specific parameters
- */
-
- #ifdef vax
- # ifdef unix
- # define vax_asm TRUE /* If asm() supported on vax */
- # endif
- #endif
- #ifndef vax_asm
- # define vax_asm FALSE
- #endif
-
- #ifdef pdp11
- # define BITS 12 /* max bits/code for 16-bit machine */
- # define USERMEM 0 /* Force no user memory */
- # define UCHAR FALSE /* TRUE if compiler supports unsigned char */
- # define MAXIO 512 /* Buffer size for PDP-11 I/O buffers */
- #endif
-
- /*
- * Set default values for some parameters.
- */
-
- #ifndef DEBUG
- # define DEBUG FALSE
- #endif
-
- #ifdef interdata
- # define SIGNED_COMPARE_SLOW TRUE
- #endif
- #ifndef SIGNED_COMPARE_SLOW
- # define SIGNED_COMPARE_SLOW FALSE
- #endif
-
- #ifndef USERMEM
- # define USERMEM 750000 /* default user memory */
- #endif
-
- #ifndef UCHAR
- # define UCHAR TRUE /* Compiler supports unsigned char */
- #endif
-
- #ifndef MAXIO
- # define MAXIO 2048 /* I/O buffer size */
- #endif
-
- /*
- * Set derived tuning parameters.
- */
-
- #ifndef USERMEM
- # define USERMEM 0
- #endif
- #if USERMEM >= (433484 + SACREDMEM)
- # define PBITS 16
- #else
- # if USERMEM >= (229600 + SACREDMEM)
- # define PBITS 15
- # else
- # if USERMEM >= (127536 + SACREDMEM)
- # define PBITS 14
- # else
- # if USERMEM >= ( 73464 + SACREDMEM)
- # define PBITS 13
- # else /* Smaller systems */
- # define PBITS 12
- # endif
- # endif
- # endif
- #endif
-
- #ifndef BITS
- # define BITS PBITS
- #endif
-
- #ifdef M_XENIX
- # if BITS >= 16
- # define XENIX_16 /* Enable special vector access macros */
- # else
- # if BITS > 13
- # undef BITS
- # define BITS 13 /* Code only handles BITS = 12, 13, 16 */
- # endif
- # endif
- #endif
-
- /*
- * HSIZE is the size of the hash lookup table. It is set to
- * 1 << BITS + fudge factor, rounded up to a prime number.
- * If it is too big, the "clear the hash" routine will take
- * too long. The same numbers are replicated in the getsize()
- * routine's data table.
- */
-
- #if BITS == 16
- # define HSIZE 69001 /* 95% occupancy */
- #endif
- #if BITS == 15
- # define HSIZE 35023 /* 94% occupancy */
- #endif
- #if BITS == 14
- # define HSIZE 18013 /* 91% occupancy */
- #endif
- #if BITS == 13
- # define HSIZE 9001 /* 91% occupancy */
- #endif
- #if BITS <= 12
- # define HSIZE 5003 /* 80% occupancy */
- #endif
-
- /*
- * typedef's -- somewhat machine specific.
- */
-
- /*
- * a code_int must be able to hold 2**BITS values of type int, and also -1
- */
- #if BITS > 15
- typedef long int code_int;
- #else
- typedef int code_int;
- #endif
-
- /*
- * A count_int must hold ((2**BITS)-1) + (255<<BITS)) and -1.
- *
- * count_int's also hold counters.
- *
- * count_short's hold small counters (for the interdata)
- *
- * Some implementations don't support unsigned char (Decus C, for example)
- * Decus C is also brain damaged with regards to unsigned shorts.
- */
- #if SIGNED_COMPARE_SLOW
- typedef unsigned long int count_int;
- typedef unsigned short int count_short;
- #else
- typedef long int count_int;
- #endif
-
- #if UCHAR
- typedef unsigned char char_type;
- #else
- typedef char char_type;
- #endif
-
- #ifdef decus
- typedef unsigned U_short;
- #define readonly /* Dummy out readonly modifier */
- #else
- typedef unsigned short U_short;
- #endif
-
- #ifdef unix
- #define readonly
- #endif
-
- typedef short flag; /* Boolean flag or parameter */
-
- /*
- * The following define the "magic cookie" header
- */
- #define HEAD1_MAGIC 0x1F
- #define HEAD2_MAGIC 0x9D
- #define VMS_HEAD2_MAGIC 0x9E /* vms-private output format */
-
- /*
- * Defines for third byte of header
- */
- #define BIT_MASK 0x1F /* Gets NBITS in the code */
- #define BLOCK_MASK 0x80 /* Gets block_compress flag */
- /*
- * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
- * a fourth header byte (for expansion).
- */
-
- /*
- * This is for backwards compatibilty with an old version of Unix compress.
- */
- #ifdef COMPATIBLE /* Compatible, but wrong! */
- # define MAXCODE(n_bits) (1 << (n_bits) - 1)
- #else
- # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
- #endif
-
- #define INIT_BITS 9 /* initial number of bits/code */
-
- /*
- * One code could conceivably represent (1<<BITS) characters, but
- * to get a code of length N requires an input string of at least
- * N*(N-1)/2 characters. With 5000 chars in the stack, an input
- * file would have to contain a 25Mb string of a single character.
- * This seems unlikely.
- */
- #define MAXSTACK 8000 /* size of lzdcmp output stack */
-
- #ifndef CHECK_GAP
- # define CHECK_GAP 10000 /* ratio check interval */
- #endif
-
- #ifndef __LINE__
- # define NO__LINE__
- #endif
- #ifndef __FILE__
- # define NO__LINE__
- #endif
- #if DEBUG
- # define VERBOSE_DEFAULT 1
- # ifndef NO__LINE__
- # define FAIL(why) \
- fprintf(stderr, "\nfatal: %s (%s at %d)\n", \
- why, __FILE__, __LINE__); \
- longjmp(failure, 1);
- # else
- # define FAIL(why) \
- fprintf(stderr, "\nfatal: %s\n", why); \
- longjmp(failure, 1);
- # endif
- #else
- # define VERBOSE_DEFAULT 0
- # define FAIL(why) longjmp(failure, 1);
- #endif
-
- /*
- * Note -- for compatibility with Unix compress,
- * NBR_CHAR and LZ_CLEAR must equal 256.
- * Also, (1 << (MIN_BITS - 1) should equal or exceed NBR_CHR
- */
- #define NBR_CHAR 256 /* Number of input codes */
- #define MIN_BITS 9 /* Smallest code is 9 bits */
- #if ((1 << BITS) < NBR_CHAR) || (BITS < MIN_BITS)
- << Can't compile: not enough bits for the input character set size >>
- #endif
- #define LZ_CLEAR (NBR_CHAR) /* Clear code */
- #define LZ_SOH (LZ_CLEAR + 1) /* Start of header block */
- #define LZ_STX (LZ_SOH + 1) /* Start of text block */
- #define LZ_EOR (LZ_STX + 1) /* End of text record */
- #define LZ_ETX (LZ_EOR + 1) /* End of header/text block */
- #define LZ_FIRST (LZ_ETX + 1) /* First user (data) code */
-
- #ifdef vms
- #include errno
- #include ssdef
- #include stsdef
- #define IO_SUCCESS (SS$_NORMAL | STS$M_INHIB_MSG)
- #define IO_ERROR (SS$_ABORT)
- #define VMS_V4 L_cuserid >= 16 /* Enable new stuff */
- #else
- #define VMS_V4 0 /* Disable new stuff */
- extern int errno;
- #ifdef decus
- #define errno $$ferr
- #endif
- #endif
-
- /*
- * Define exit() codes.
- */
-
- #ifndef IO_SUCCESS
- #define IO_SUCCESS 0 /* Normal exit */
- #define IO_ERROR 1 /* Error exit */
- #endif
-
- /*
- * All I/O is done by way of "streams". To establish a stream,
- * set the parameters appropriately and off you go. The following
- * functions are provided:
- * lz_fill(stream) fills the buffer from stdin
- * lz_flush(stream) writes the buffer to stdout
- * lz_eof(stream) returns EOF (for fill from memory)
- * lz_fail(stream) abort (for writing to memory).
- * lz_dummy(stream) throw an output stream away.
- * Note: if VMS_V4 is enabled and the private (non-export) format
- * chosen, lz_fill and lz_flush access the files appropriately.
- * Stream elements are initialized as follows:
- * Input: bp = NULL; bend = NULL;
- * Output: bp = bstart; bend = bstart + bsize;
- */
-
- typedef struct STREAM {
- char_type *bp; /* Next character to get/put */
- char_type *bend; /* -> end of stream buffer */
- char_type *bstart; /* Start of stream buffer */
- short bsize; /* Stream buffer size */
- int (*func)(); /* Read/write a buffer function */
- } STREAM;
-
- /*
- * Note also that the compress routine uses putbuf(buf, count, outstream)
- * and the decompress routine uses getbuf(buf, count, instream) to (quickly)
- * transfer multiple bytes.
- */
- #if UCHAR
- #define GET(s) \
- (((s)->bp < (s)->bend) ? *(s)->bp++ : (*(s)->func)(s))
- #else
- #define GET(s) \
- (((s)->bp < (s)->bend) ? *(s)->bp++ & 0xFF : (*(s)->func)(s))
- #endif
- #define PUT(c, s) \
- ((((s)->bp >= (s)->bend) ? (*(s)->func)(s) : 0), *(s)->bp++ = (c))
-
- extern int lz_fill();
- extern int lz_flush();
- extern int lz_eof();
- extern int lz_fail();
- extern int lz_dummy();
-
- #if DEBUG
- extern readonly char *lz_names[]; /* "LZ_CLEAR" etc. */
- #endif
-
- /*
- * Options and globals.
- */
- #if VMS_V4
- #define ATT_NAME "vms$attributes "
- #define ATT_SIZE 15 /* strlen(ATT_NAME) */
- extern int fdl_status; /* Error code from fdl library */
- #endif
-
- extern flag binary; /* -b Readable text file if FALSE */
- extern flag noheader; /* -x3 No magic header if TRUE */
- extern flag export; /* -x (non-zero) Supress vms private */
- extern flag block_compress; /* -x2 */
- extern flag verbose; /* -v (non-zero) Verbose logging */
- extern readonly flag is_compress; /* TRUE if compress, FALSE if decomp. */
- extern char *infilename; /* For error printouts */
- extern char *outfilename; /* For more error printouts */
- extern short n_bits; /* Current # of bits in compressed file */
- extern int firstcode; /* First value past signals */
- extern jmp_buf failure; /* For longjmp() return */
-
- -h- lzio.c Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZIO.C;15
- /*
- * l z i o . c
- *
- * I/O buffer management. All input/output I/O is done through these
- * routines (and the macros in lz.h). The rules of the game are:
- *
- * input via GET() and getbuf().
- * GET returns an 8-bit byte, or -1 on eof/error.
- * getbuf() returns the number of things gotten, or -1 on eof/error.
- * No return on error: longjmp's to the main-line.
- *
- * output via PUT() and lz_putbuf().
- * No return on error: longjmp's to the main-line.
- * flush output by lz_flush() before closing files -- or you'll lose data.
- */
-
- #include "lz.h"
- #if VMS_V4
- #include <rmsdef.h>
- #ifndef FDLSTUFF
- #define FDLSTUFF char
- #endif
- extern FDLSTUFF *fdl_input;
- extern FDLSTUFF *fdl_output;
- extern int fdl_status;
- #endif
-
- int
- lz_fill(s)
- register STREAM *s;
- {
- register int i;
- extern char *infilename;
-
- #if VMS_V4
- if (export && is_compress) {
- i = fread((char *) s->bstart, 1, s->bsize, stdin);
- if (ferror(stdin)) {
- perror(infilename);
- FAIL("export && is_compress fread error");
- }
- }
- else { /* Decompress and export/private */
- i = fdl_read(s->bstart, s->bsize, fdl_input);
- if (i < 0 && fdl_status != RMS$_EOF)
- fdl_message(fdl_input, "Read error");
- }
- #else
- #ifdef unix
- i = read(fileno(stdin), (char *) s->bstart, s->bsize);
- if (i < 0) {
- perror(infilename);
- FAIL("unix read error");
- }
- #else
- i = fread((char *) s->bstart, 1, s->bsize, stdin);
- if (ferror(stdin)) {
- perror(infilename);
- exit(IO_ERROR);
- }
- #endif
- #endif
- if (i <= 0)
- return (EOF);
- else {
- s->bp = s->bstart;
- s->bend = &s->bstart[i];
- #if UCHAR
- return (*s->bp++);
- #else
- return (*s->bp++ & 0xFF);
- #endif
- }
- }
-
- lz_flush(s)
- register STREAM *s;
- {
- register int count;
- extern char *outfilename;
-
- count = s->bp - s->bstart;
- #if DEBUG
- if (!is_compress && verbose > 4) {
- fprintf(stderr, "lz_flush %d: ", count);
- dumptext(s->bstart, count, stderr);
- }
- #endif
- #if VMS_V4
- if (export) {
- if (is_compress)
- fwrite((char *) s->bstart, count, 1, stdout);
- else {
- register char *bp, *bend;
-
- for (bp = s->bstart, bend = bp + count; bp < bend; bp++)
- putchar(*bp);
- }
- if (ferror(stdout)) {
- perror(outfilename);
- FAIL("VMS V4 fwrite/putchar error");
- }
- }
- else {
- if (fdl_write((char *) s->bstart, count, fdl_output) == -1) {
- fdl_message(fdl_output, "Write error");
- FAIL("VMS V4 fdl_write error");
- }
- }
- #else
- #ifdef unix
- if (write(fileno(stdout), (char *) s->bstart, count) != count) {
- perror(outfilename);
- fprintf(stderr, "Can't write to \"%s\"\n", outfilename);
- FAIL("Unix write error");
- }
- #else
- fwrite((char *) s->bstart, 1, count, stdout);
- if (ferror(stdout)) {
- perror(outfilename);
- FAIL("Other (decus) fwrite error");
- }
- #endif
- #endif
- s->bp = s->bstart;
- }
-
- int
- lz_getbuf(buffer, count, s)
- char_type *buffer;
- int count;
- register STREAM *s;
- /*
- * Read a block of data -- be clever. Return number gotten, or -1
- * on eof.
- */
- {
- register char_type *bp; /* -> buffer */
- register char_type *ip; /* -> I/O buffer */
- register char_type *ep; /* End of segment */
- register int remaining; /* Size of segment */
- int datum;
-
- if (count == 0) /* Shouldn't happen */
- return (0);
- bp = buffer;
- while (--count >= 0) {
- if ((datum = GET(s)) == EOF) /* Maybe fill LZ buff */
- break;
- *bp++ = datum;
- remaining = s->bend - (ip = s->bp);
- if (remaining > count)
- remaining = count;
- ep = &ip[remaining];
- while (ip < ep)
- *bp++ = *ip++;
- count -= remaining;
- s->bp = ip; /* Refresh buffer */
- }
- return ((bp == buffer) ? -1 : bp - buffer);
- }
-
- int
- lz_putbuf(bp, count, s)
- register char_type *bp;
- int count;
- register STREAM *s;
- /*
- * Write a block of data -- be clever.
- */
- {
- register char_type *op; /* -> I/O buffer */
- register char_type *ep; /* End of segment */
- register int remaining; /* Size of segment */
-
- while (--count >= 0) {
- PUT(*bp++, s); /* Forces a buffer */
- remaining = s->bend - (op = s->bp);
- if (remaining > count)
- remaining = count;
- ep = &op[remaining];
- while (op < ep)
- *op++ = *bp++;
- count -= remaining;
- s->bp = op; /* Refresh buffer */
- }
- }
-
- int
- lz_eof(s)
- STREAM *s;
- /*
- * Dummy routine for read from memory -- returns EOF.
- */
- {
- return (s, EOF);
- }
-
- int
- lz_fail(s)
- STREAM *s;
- /*
- * Dummy routine for write to memory -- called if buffer fills.
- */
- {
- fprintf(stderr, "Memory buffer [%d bytes] filled -- fatal.\n",
- s->bsize);
- FAIL("lz_fail crash");
- }
-
- int
- lz_dummy(s)
- STREAM *s;
- /*
- * Dummy routine for write to memory -- writes to the bit-bucket.
- */
- {
- s->bp = s->bstart;
- }
-
- #ifndef decus
- /*
- * Signal error handlers.
- */
- #ifdef vms
- #define unlink delete
- #endif
-
- interrupt()
- {
- if (outfilename != NULL && !streq(outfilename, "<stdout>"))
- unlink(outfilename);
- exit(IO_ERROR);
- }
-
- address_error()
- {
- if (!is_compress)
- fprintf(stderr, "Decompress: corrupt input file\n");
- interrupt();
- }
- #endif
-
- /*
- * getredirection() is intended to aid in porting C programs
- * to VMS (Vax-11 C) which does not support '>' and '<'
- * I/O redirection. With suitable modification, it may
- * useful for other portability problems as well.
- */
-
- #ifdef vms
-
- int
- getredirection(argc, argv)
- int argc;
- char **argv;
- /*
- * Process vms redirection arg's. Exit if any error is seen.
- * If getredirection() processes an argument, it is erased
- * from the vector. getredirection() returns a new argc value.
- *
- * Warning: do not try to simplify the code for vms. The code
- * presupposes that getredirection() is called before any data is
- * read from stdin or written to stdout.
- *
- * Normal usage is as follows:
- *
- * main(argc, argv)
- * int argc;
- * char *argv[];
- * {
- * argc = getredirection(argc, argv);
- * }
- */
- {
- register char *ap; /* Argument pointer */
- int i; /* argv[] index */
- int j; /* Output index */
- int file; /* File_descriptor */
-
- for (j = i = 1; i < argc; i++) { /* Do all arguments */
- switch (*(ap = argv[i])) {
- case '<': /* <file */
- if (freopen(++ap, "r", stdin) == NULL) {
- perror(ap); /* Can't find file */
- exit(IO_ERROR); /* Is a fatal error */
- }
- break;
-
- case '>': /* >file or >>file */
- if (*++ap == '>') { /* >>file */
- /*
- * If the file exists, and is writable by us,
- * call freopen to append to the file (using the
- * file's current attributes). Otherwise, create
- * a new file with "vanilla" attributes as if
- * the argument was given as ">filename".
- * access(name, 2) is TRUE if we can write on
- * the specified file.
- */
- if (access(++ap, 2) == 0) {
- if (freopen(ap, "a", stdout) != NULL)
- break; /* Exit case statement */
- perror(ap); /* Error, can't append */
- exit(IO_ERROR); /* After access test */
- } /* If file accessable */
- }
- /*
- * On vms, we want to create the file using "standard"
- * record attributes. create(...) creates the file
- * using the caller's default protection mask and
- * "variable length, implied carriage return"
- * attributes. dup2() associates the file with stdout.
- */
- if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1
- || dup2(file, fileno(stdout)) == -1) {
- perror(ap); /* Can't create file */
- exit(IO_ERROR); /* is a fatal error */
- } /* If '>' creation */
- break; /* Exit case test */
-
- default:
- argv[j++] = ap; /* Not a redirector */
- break; /* Exit case test */
- }
- } /* For all arguments */
- argv[j] = NULL; /* Terminate argv[] */
- return (j); /* Return new argc */
- }
- #endif
-
- #if 1 || DEBUG
-
- int col;
-
- readonly char *lz_names[] = {
- "LZ_CLEAR", "LZ_SOH", "LZ_STX", "LZ_EOR", "LZ_ETX", "???"
- };
-
- dumphex(buffer, count, fd)
- register char_type *buffer;
- register int count;
- FILE *fd;
- {
- if (col > 0) {
- putc('\n', fd);
- col = 0;
- }
- fprintf(fd, "%2d:", count);
- while (--count >= 0) {
- fprintf(fd, " %02x", *buffer++ & 0xFF);
- }
- fprintf(fd, "\n");
- }
-
- dumptext(buffer, count, fd)
- register char_type *buffer;
- int count;
- FILE *fd;
- {
- extern char *dumpchar();
-
- putc('"', fd);
- while (--count >= 0)
- fputs(dumpchar((int) *buffer++), fd);
- fputs("\"\n", fd);
- }
-
- char *
- dumpchar(c)
- register int c;
- /*
- * Make a character printable. Returns a static pointer.
- */
- {
- static char dump_buffer[8];
-
- c &= 0xFF;
- if (isascii(c) && isprint(c)) {
- dump_buffer[0] = c;
- dump_buffer[1] = EOS;
- }
- else {
- switch (c) {
- case '\n': return ("\\n");
- case '\t': return ("\\t");
- case '\b': return ("\\b");
- case '\f': return ("\\f");
- case '\r': return ("\\r");
- }
- sprintf(dump_buffer, "<x%02X>", c);
- }
- return (dump_buffer);
- }
- #endif
-
- /*
- * Cputime returns the elapsed process time (where available) in msec.
- * Note: Unix doesn't seem to have a good way to determine ticks/sec.
- */
-
- #ifdef decus
- #include <timeb.h>
-
- long
- cputime()
- {
- struct timeb buf;
- static struct timeb origin;
- long result;
- int msec;
-
- if (origin.time == 0)
- ftime(&origin);
- ftime(&buf);
- result = (buf.time - origin.time) * 1000;
- msec = ((int) buf.msec) - ((int) origin.msec);
- return (result + ((long) msec));
- }
- #else
- #ifdef vms
- #include <types.h>
- struct tms {
- time_t tms_utime;
- time_t tms_stime;
- time_t tms_uchild; /* forgot the */
- time_t tms_uchildsys; /* real names */
- };
- #define HERTZ 100.0 /* 10 msec units */
- #else
- #include <sys/types.h>
- #include <sys/times.h>
- #ifndef HERTZ
- #define HERTZ 60.0 /* Change for Europe */
- #endif
- #endif
-
- long
- cputime()
- {
- struct tms tms;
- double temp;
- long result;
-
- times(&tms);
- result = tms.tms_utime + tms.tms_stime;
- temp = result * 1000.0 / HERTZ; /* Time in msec. */
- result = temp;
- return (result);
- }
- #endif
-
- -h- lzvio.c Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZVIO.C;3
- /*
- * l z v i o . c
- * For VMS V4 only.
- */
-
- /*
- * Problems:
- * If you open a second input file (getting rms attributes)
- * it aborts with an internal "fatal" error (15820C LIB-F-FATERRLIB)
- */
-
- /*
- * Make TESTING_FDLIO non-zero to enable test code.
- *
- * Edit History
- */
- #ifndef TESTING_FDLIO
- #define TESTING_FDLIO 0
- #endif
-
- /*
- * RMS/FDL record level i/o routines for Vax-11 C V4 or greater only.
- * Rather crude.
- *
- * The following are provided:
- *
- * #define FDLSTUFF char
- * #include descrip
- *
- * FDLSTUFF *
- * fdl_open(filename, fdl_descriptor)
- * char *filename;
- * struct dsc$descriptor *fdl_descriptor;
- * Initializes internal buffers and opens this existing
- * file for input. The filename may not contain wildcards.
- * On (successful) return, fdl_descriptor will point to
- * an initialized fdl specification. The description
- * string will be in malloc'ed memory. The caller does not
- * initialize the fdl_descriptor. Returns NULL on error.
- * (Note an error will be returned if the file is not
- * block-oriented.)
- *
- * When you don't need the fdl_descriptor information
- * any more, free it by calling
- * fdl_free(fdl_descriptor);
- * if fdl_descriptor is NULL on entry, the file is opened
- * normally (fdl information is not collected).
- *
- * FDLSTUFF *
- * fdl_create(fdl_descriptor, override_filename)
- * struct dsc$descriptor *fdl_descriptor;
- * char *override_filename;
- * Creates a file using the fdl specification.
- * If override_filename is not NULL and not equal to "",
- * it will override the filename specified in the fdl.
- * fdl_write() is used to write data to the file.
- * Returns NULL on error.
- *
- * if fdl_descriptor is NULL, the file is created using
- * the name in override_filename (which must be present).
- * The file is created in "undefined" record format.
- *
- * fdl_free(fdl_descriptor)
- * struct dsc$descriptor *fdl_descriptor;
- * Releases the fdl descriptor block.
- *
- * int
- * fdl_read(buffer, buffer_length, r)
- * char *buffer;
- * int buffer_length;
- * FDLSTUFF *r;
- * Read buffer_length bytes from the file (using SYS$READ).
- * No expansion or interpretation. buffer_length had
- * better be even or you're asking for trouble. Returns
- * the actual number of bytes read. The file has been
- * opened by fdl_open.
- *
- * int
- * fdl_write(buffer, buffer_length, r)
- * char *buffer;
- * int buffer_length;
- * FDLSTUFF *r;
- * Write buffer_length bytes to the file (using SYS$WRITE).
- * No expansion or interpretation. buffer_length had
- * better be even or you're asking for trouble. Returns
- * the actual number of bytes written. The file was opened
- * by fdl_create();
- *
- * fdl_getname(r, buffer)
- * FDLSTUFF *r;
- * char *buffer;
- * Copies the currently open file's name to the caller's
- * data buffer buffer.
- *
- * long
- * fdl_fsize(r)
- * Returns the size in bytes of the opened file.
- *
- * fdl_dump(fdl_descriptor, fd)
- * struct dsc$descriptor *fdl_descriptor;
- * FILE *fd;
- * Writes the fdl info to the indicated file with
- * line breaks in appropriate places.
- *
- * fdl_message(r, why)
- * FDLSTUFF *r;
- * char *why;
- * All system-level routines set a global value, fdl_status.
- * fdl_message() prints the error message text corresponding
- * to the current value of fdl_status. The message printed
- * has the format:
- * why current_filename: error_message.
- * If why is NULL, only the error_message is printed.
- */
-
- #include "lz.h"
- #if VMS_V4
- #include rms
- #include ssdef
- #include descrip
- #include devdef
- #ifndef FDL$M_FDL_SIGNAL
- #define FDL$M_FDL_SIGNAL 1 /* Signal errors if set */
- #endif
- #ifndef FDL$M_FDL_STRING
- #define FDL$M_FDL_STRING 2 /* Use string for fdl text */
- #endif
- #if TESTING_FDLIO
- #define SIGNAL_ON_ERROR FDL$M_FDL_SIGNAL
- #else
- #define SIGNAL_ON_ERROR 0
- #endif
-
- #define TRUE 1
- #define FALSE 0
- #define EOS 0
-
- typedef struct FDLSTUFF {
- struct RAB rab; /* Record access buffer */
- struct FAB fab; /* File access buffer */
- struct NAM nam; /* File name buffer */
- struct XABFHC xab; /* Extended attributes block */
- char starname[NAM$C_MAXRSS + 1]; /* Wild file name */
- char filename[NAM$C_MAXRSS + 1]; /* Open file name */
- } FDLSTUFF;
-
- int fdl_status; /* Set to last rms call status */
-
- static FDLSTUFF *
- fail(r, why, name)
- FDLSTUFF *r; /* Buffer */
- char *why; /* A little commentary */
- char *name; /* Argument to perror */
- /*
- * Problem exit routine
- */
- {
- #if TESTING_FDLIO
- if (name == NULL && r != NULL)
- name = r->fab.fab$l_fna;
- message(r, why, name);
- #endif
- if (r != NULL)
- free(r);
- return (NULL);
- }
-
- FDLSTUFF *
- fdl_open(filename, fdl_descriptor)
- char *filename; /* What to open */
- struct dsc$descriptor *fdl_descriptor; /* Result descriptor */
- /*
- * Open the file. Returns NULL on failure, else a pointer to RMS stuff.
- * Which is equivalently a pointer to the RAB. (Note that the RAB points
- * in turn to the FAB.)
- *
- * Return the file's fdl descriptor in the user-supplied (uninitialized)
- * descriptor.
- */
- {
- register FDLSTUFF *r;
- int retlen;
- int badblk;
- struct FAB *fab_add;
- struct RAB *rab_add;
- static int flags = (FDL$M_FDL_STRING | SIGNAL_ON_ERROR);
- extern FDLSTUFF *fdl_setup();
-
- if ((r = fdl_setup(filename)) == NULL)
- return (NULL);
- /*
- * Now open the file.
- */
- r->fab.fab$b_fac = FAB$M_GET | FAB$M_BIO; /* Block I/O only */
- if ((fdl_status = sys$open(&r->fab)) != RMS$_NORMAL) {
- return (fail(r, "opening file", NULL));
- }
- if ((r->fab.fab$l_dev & DEV$M_REC) != 0) {
- fail(r, "Record only device");
- fdl_close(r);
- return (NULL);
- }
- r->rab.rab$l_rop = RAB$M_BIO; /* Block I/O only */
- if ((fdl_status = sys$connect(&r->rab)) != RMS$_NORMAL)
- return (fail(r, "connecting after open", NULL));
- if (fdl_descriptor != NULL) {
- /*
- * Now, get the file attributes
- */
- fdl_descriptor->dsc$w_length = 4096;
- fdl_descriptor->dsc$b_dtype = DSC$K_DTYPE_VT;
- fdl_descriptor->dsc$b_class = DSC$K_CLASS_D;
- fdl_descriptor->dsc$a_pointer = malloc(4096);
- fab_add = &r->fab;
- rab_add = &r->rab;
- if ((fdl_status = fdl$generate(
- &flags,
- &fab_add,
- &rab_add,
- 0, 0,
- fdl_descriptor,
- &badblk,
- &retlen)) != SS$_NORMAL) {
- fdl_free(fdl_descriptor);
- sys$close(&r->fab);
- return(fail(r, "getting fdl info", NULL));
- }
- /*
- * Success, null-terminate fdl info and squeeze the block.
- */
- fdl_descriptor->dsc$a_pointer[retlen] = EOS;
- fdl_descriptor->dsc$a_pointer
- = realloc(fdl_descriptor->dsc$a_pointer, retlen + 1);
- fdl_descriptor->dsc$w_length = retlen;
- }
- return (r);
- }
-
- FDLSTUFF *
- fdl_create(fdl_descriptor, override_filename)
- struct dsc$descriptor *fdl_descriptor; /* Result descriptor */
- char *override_filename; /* What to open */
- /*
- * Create the file, Returns NULL on failure, else a pointer to RMS stuff.
- * Which is equivalently a pointer to the RAB. (Note that the RAB points
- * in turn to the FAB.) The file is open for writing using fdl_write.
- *
- * Uses the filename in the descriptor block, or the override filename
- * if supplied (non-NULL and not == "");
- *
- * If fdl_descriptor is NULL, the override_filename is opened normally.
- */
- {
- register FDLSTUFF *r;
- int retlen;
- int badblk;
- static int flags = (FDL$M_FDL_STRING | SIGNAL_ON_ERROR);
- struct dsc$descriptor newname;
- struct dsc$descriptor *newname_ptr;
- int fid_block[3];
- char created_name[NAM$C_MAXRSS + 1];
- struct dsc$descriptor created_name_des = {
- NAM$C_MAXRSS,
- DSC$K_DTYPE_T,
- DSC$K_CLASS_S,
- &created_name[0]
- };
- extern FDLSTUFF *fdl_setup();
-
- if (fdl_descriptor == NULL) {
- if ((r = fdl_setup(override_filename)) == NULL)
- return (NULL);
- r->fab.fab$b_fac = FAB$M_PUT | FAB$M_BIO; /* Block I/O only */
- r->fab.fab$l_fop |= (FAB$M_NAM | FAB$M_SQO | FAB$M_BIO);
- r->fab.fab$b_org = FAB$C_SEQ; /* Sequential only */
- r->fab.fab$b_rfm = FAB$C_UDF; /* Undefined format */
- if ((fdl_status = sys$create(&r->fab)) & 01 == 0)
- return (fail(r, "creating (sys$create)"));
- goto exit;
- }
- if (override_filename == NULL || override_filename[0] == '\0')
- newname_ptr = NULL;
- else {
- newname_ptr = &newname;
- newname.dsc$w_length = strlen(override_filename);
- newname.dsc$b_dtype = DSC$K_DTYPE_T;
- newname.dsc$b_class = DSC$K_CLASS_S;
- newname.dsc$a_pointer = override_filename;
- }
- if ((fdl_status = fdl$create(fdl_descriptor,
- newname_ptr, /* New file name if any */
- 0, /* Default filename */
- &created_name_des, /* Resultant filename */
- &fid_block[0], /* File ID block */
- &flags, /* FDL flag bits */
- 0, /* Statement number */
- &retlen, /* Created name length */
- 0, 0) /* Create status, stv */
- ) & 01 == 0) {
- return(fail(NULL, "creating (fdl$create)", NULL));
- }
- created_name[retlen] = '\0';
- if ((r = fdl_setup(created_name)) == NULL)
- return (NULL);
- /*
- * Now, open the file for output.
- */
- r->fab.fab$b_fac = FAB$M_PUT | FAB$M_BIO; /* Block I/O only */
- if ((fdl_status = sys$open(&r->fab)) != RMS$_NORMAL) {
- return (fail(r, "opening created file", NULL));
- }
- exit: if ((r->fab.fab$l_dev & DEV$M_REC) != 0) {
- fail(r, "Record only device");
- fdl_close(r);
- return (NULL);
- }
- r->rab.rab$l_rop = RAB$M_BIO; /* Block I/O only */
- if ((fdl_status = sys$connect(&r->rab)) != RMS$_NORMAL)
- return (fail(r, "connecting after create", NULL));
- return (r);
- }
-
- static FDLSTUFF *
- fdl_setup(filename)
- char *filename;
- /*
- * Initializes rms blocks and parses file name. Returns the
- * FDL data block on success, NULL on error.
- */
- {
- register FDLSTUFF *r;
-
- if ((r = (char *)malloc(sizeof (FDLSTUFF))) == NULL)
- return (NULL);
- r->fab = cc$rms_fab; /* Preset fab, */
- r->nam = cc$rms_nam; /* name block */
- r->rab = cc$rms_rab; /* and record block */
- r->xab = cc$rms_xabfhc; /* file header block */
- r->fab.fab$l_nam = &r->nam; /* fab -> name block */
- r->fab.fab$l_xab = &r->xab; /* fab -> file header */
- r->fab.fab$l_fna = filename; /* Argument filename */
- r->fab.fab$b_fns = strlen(filename); /* ... size */
- r->rab.rab$l_fab = &r->fab; /* rab -> fab */
- /* Stuff the name block */
- r->nam.nam$l_esa = r->starname; /* Expanded filename */
- r->nam.nam$b_ess = NAM$C_MAXRSS + 1; /* ... size */
- r->nam.nam$b_rss = NAM$C_MAXRSS + 1; /* ... max size */
- if ((fdl_status = sys$parse(&r->fab)) != RMS$_NORMAL) {
- return (fail(r, "parsing", filename));
- }
- ((char *)r->nam.nam$l_esa)[r->nam.nam$b_esl] = EOS;
- r->fab.fab$l_fna = r->nam.nam$l_esa; /* File name */
- r->fab.fab$b_fns = r->nam.nam$b_esl; /* Length */
- r->fab.fab$l_fop |= FAB$M_NAM; /* Use name block */
- return (r);
- }
-
- fdl_free(fdl_descriptor)
- struct dsc$descriptor *fdl_descriptor;
- /*
- * Release the descriptor
- */
- {
- if (fdl_descriptor->dsc$a_pointer != NULL) {
- free(fdl_descriptor->dsc$a_pointer);
- fdl_descriptor->dsc$a_pointer = NULL;
- }
- }
-
- fdl_close(r)
- register FDLSTUFF *r;
- {
- if ((fdl_status = sys$close(&r->fab)) != RMS$_NORMAL)
- return(fail(r, "close", NULL));
- free(r);
- }
-
- int
- fdl_read(buffer, buffer_length, r)
- char *buffer; /* Record */
- int buffer_length; /* Record length */
- register FDLSTUFF *r; /* Record info. */
- /*
- * Read the next record from the file. Returns number of bytes read or
- * -1 on any error. fdl_status has the status.
- */
- {
- r->rab.rab$l_ubf = buffer;
- r->rab.rab$w_usz = buffer_length;
- r->rab.rab$l_bkt = 0;
- if ((fdl_status = sys$read(&r->rab)) != RMS$_NORMAL) {
- #if TESTING_FDLIO
- if (fdl_status != RMS$_EOF) {
- fdl_message(r, "error return from sys$read");
- sleep(1);
- }
- #endif
- return (-1);
- }
- return (r->rab.rab$w_rsz);
- }
-
- int
- fdl_write(buffer, buffer_length, r)
- char *buffer; /* Record */
- int buffer_length; /* Record length */
- register FDLSTUFF *r; /* Record info. */
- /*
- * Write the next record to the file. Returns number of bytes written or
- * -1 on any error. fdl_status has the status.
- */
- {
- r->rab.rab$l_rbf = buffer;
- r->rab.rab$w_rsz = buffer_length;
- r->rab.rab$l_bkt = 0;
- if ((fdl_status = sys$write(&r->rab)) != RMS$_NORMAL) {
- #if TESTING_FDLIO
- fdl_message(r, "error return from sys$write");
- sleep(1);
- #endif
- return (-1);
- }
- return (r->rab.rab$w_rsz);
- }
-
- fdl_getname(r, buffer)
- FDLSTUFF *r; /* File pointer */
- char *buffer; /* Where to put it */
- /*
- * Return current file name
- */
- {
- strcpy(buffer, r->fab.fab$l_fna);
- return (buffer);
- }
-
- long
- fdl_fsize(r)
- FDLSTUFF *r; /* File pointer */
- /*
- * Return current file size
- */
- {
- return (((long) r->xab.xab$l_ebk * 512) + r->xab.xab$w_ffb);
- }
-
- fdl_message(r, why)
- FDLSTUFF *r;
- char *why;
- /*
- * Print error message
- */
- {
- extern char *vms_etext();
-
- if (why == NULL) {
- fprintf(stderr, "\n%s\n\n", vms_etext(fdl_status));
- }
- else {
- fprintf(stderr, "\n%s%s%s: %s\n\n",
- why,
- (why[0] == EOS) ? "" : " ",
- (r == NULL) ? "" : r->fab.fab$l_fna,
- vms_etext(fdl_status));
- }
- }
-
- static char errname[257]; /* Error text stored here */
- static $DESCRIPTOR(err, errname); /* descriptor for error text */
-
- static char *
- vms_etext(errorcode)
- int errorcode;
- {
- char *bp;
- short errlen; /* Actual text length */
-
- lib$sys_getmsg(&errorcode, &errlen, &err, &15);
- /*
- * Trim trailing junk.
- */
- for (bp = &errname[errlen]; --bp >= errname;) {
- if (isgraph(*bp) && *bp != ' ')
- break;
- }
- bp[1] = EOS;
- return(errname);
- }
-
- static
- message(r, why, name)
- FDLSTUFF *r; /* Buffer */
- char *why; /* A little commentary */
- char *name; /* File name */
- /*
- * Print error message
- */
- {
- fprintf(stderr, "\nRMS error %x when %s %s\n",
- fdl_status, why, (name == NULL) ? "" : name);
- fprintf(stderr, "\"%s\"\n", vms_etext(fdl_status));
- }
-
- fdl_dump(fdl_descriptor, fd)
- struct dsc$descriptor *fdl_descriptor;
- FILE *fd;
- /*
- * Dump the descriptor to fd.
- */
- {
- register char *tp, *end;
-
- tp = fdl_descriptor->dsc$a_pointer;
- end = tp + fdl_descriptor->dsc$w_length;
- while (tp < end) {
- if (*tp == '"') {
- do {
- putc(*tp++, fd);
- } while (*tp != '"');
- }
- putc(*tp, fd);
- if (*tp++ == ';')
- putc('\n', fd);
- }
- }
-
-
- #if TESTING_FDLIO
- /*
- * Test program for rms io
- */
- #include <stdio.h>
-
- char line[133];
- char filename[133];
- char buffer[2048];
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- FDLSTUFF *old;
- FDLSTUFF *new;
- int size, total, nrecords;
- struct dsc$descriptor fdl_info; /* Result descriptor */
-
- for (;;) {
- fprintf(stderr, "Old file name: ");
- fflush(stdout);
- if (gets(line) == NULL)
- break;
- if (line[0] == EOS)
- continue;
- if ((old = fdl_open(line, &fdl_info)) == NULL) {
- fprintf(stderr, "open failed\n");
- continue;
- }
- fprintf(stderr, "New file name: ");
- if (gets(line) == NULL)
- break;
- if ((new = fdl_create(&fdl_info, line)) == NULL) {
- fprintf(stderr, "create failed\n");
- fdl_free(&fdl_info);
- continue;
- }
- fdl_getname(old, buffer);
- fprintf(stderr, "Fdl for \"%s\", size %ld\n",
- buffer, fdl_fsize(old));
- fdl_dump(&fdl_info, stderr);
- total = nrecords = 0;
- while ((size = fdl_read(buffer, sizeof buffer, old)) > 0) {
- fdl_write(buffer, size, new);
- nrecords++;
- total += size;
- }
- fdl_close(old);
- fdl_close(new);
- fprintf(stderr, "copied %d records, %d bytes total\n",
- nrecords, total);
- fdl_free(&fdl_info);
- }
- }
-
- #endif
- #endif
-
- -h- makefile.txt Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]MAKEFILE.TXT;6
- # Unix makefile for lzcomp, lzdcmp
- #
- # The redefinition of strchr() and strrchr() are needed for
- # Ultrix-32, Unix 4.2 bsd (and maybe some other Unices).
- #
- BSDDEFINE = -Dstrchr=index -Dstrrchr=rindex
- #
- # On certain systems, such as Unix System III, you may need to define
- # $(LINTFLAGS) in the make command line to set system-specific lint flags.
- #
-
- CFLAGS = -O $(BSDDEFINES)
-
- all : lzcomp lzdcmp
-
- #
- # ** compile lzcomp
- #
- LZCOMP_SRCS = lzcmp1.c lzcmp2.c lzcmp3.c lzio.c
- LZCOMP_OBJS = lzcmp1.o lzcmp2.o lzcmp3.o lzio.o
- lzcomp: $(LZCOMP_OBJS)
- $(CC) $(CFLAGS) $(LZCOMP_OBJS) -o lzcomp
-
- #
- # ** compile lzdcmp
- #
- LZDCMP_SRCS = lzdcm1.c lzdcm2.c lzdcm3.c lzio.c
- LZDCMP_OBJS = lzdcm1.o lzdcm2.o lzdcm3.o lzio.o
- lzdcmp: $(LZDCMP_OBJS)
- $(CC) $(CFLAGS) $(LZDCMP_OBJS) -o lzdcmp
-
- #
- # ** Lint the code
- #
- lint: $(LZCOMP_SRCS) $(LZDCMP_SRCS)
- lint $(LINTFLAGS) $(DEFINES) $(LZCOMP_SRCS)
- lint $(LINTFLAGS) $(DEFINES) $(LZDCMP_SRCS)
-
- #
- # ** Remove unneeded files
- #
- clean:
- rm -f $(OBJS) lzcomp lzdcmp
-
- #
- # ** Rebuild the archive files
- # ** Uses the Decus C archive utility.
- #
- archive:
- cp Makefile makefile.txt
- archc lzcmp1.c lzcmp2.c lzcmp3.c >lz1.arc
- archc lzdcm1.c lzdcm2.c lzdcm3.c >lz2.arc
- archc lz.h lzio.c lzvio.c makefile.txt >lz3.arc
-
- #
- # Object module dependencies
- #
-
- lzcmp1.o : lzcmp1.c lz.h
-
- lzcmp2.o : lzcmp2.c lz.h
-
- lzcmp3.o : lzcmp3.c lz.h
-
- lzio.o : lzio.c lz.h
-
- lzdcm1.o : lzdcm1.c lz.h
-
- lzdcm2.o : lzdcm2.c lz.h
-
- lzdcm3.o : lzdcm3.c lz.h
-
-
- -h- lzcomp.mem Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZCOMP.MEM;2
-
-
-
-
- ____ ___________ 1 File Compression
-
-
-
- **********
- * lzcomp *
- **********
-
-
-
- NAME: lzcomp -- File Compression
-
- SYNOPSIS:
-
- lzcomp [-options] [infile [outfile]]
-
- DESCRIPTION:
-
- lzcomp implements the Lempel-Ziv file compression
- algorithm. (Files compressed by lzcomp are uncompressed
- by lzdcmp.) It operates by finding common substrings and
- replaces them with a variable-size code. This is
- deterministic, and can be done with a single pass over
- the file. Thus, the decompression procedure needs no
- input table, but can track the way the table was built.
-
- Options may be given in either case.
-
- -B Input file is "binary", not "human readable
- text". This is necessary on Dec operating
- systems, such as VMS and RSX-11M, that treat
- these files differently. (Note that binary
- support is rudamentary and probably insufficient
- as yet.) (On VMS version 4, this is ignored
- unless the -x option is specified or the input
- file is record-oriented.)
-
- -M bits Write using the specified number of bits in the
- code -- necessary for big machines making files
- for little machines. For example, if
- compressing a file on VMS which is to be read on
- a PDP-11, you should select -M 12.
-
- -V [n] Verbose if specified. If a value is specified,
- it will enable debugging code (if compiled in).
-
- -X [n] "Export" -- write a file format that can be read
- by other operating systems. Only the bytes in
- the file are copied; file attributes are not
- preserved. If specified, the value determines
- the level of compatiblity. If not specified, or
- specified with an explicit value of zero, and
- lzcomp is running on Vax/VMS version 4 under
- VaxC and the input file is a disk or magtape
- Page 2
- lzcomp File Compression
-
-
- file (block-oriented), a VMS-private output
- format is used which is incompatible with the
- Unix compress utility, but which preserves VMS
- file attributes. -X may take on the following
- values:
-
- 0 Choose VMS private format. See restrictions
- below.
- 1 Compatible with Unix compress version 3.0:
- this is the default if -x is given without a
- value.
- 2 As above, but supress "block compression"
- 3 Supress block compression and do not output
- a compress header block. This is for
- compatiblity with a quite early version of
- Unix compress (and requires
- conditional-compilation to use).
-
- Note that the -B (binary) option is ignored
- unless the input file is "record-oriented", such
- as a terminal or mailbox.
-
- The other two arguments are the input and output
- filenames respectively. Redirection is supported,
- however, the output must be a disk/tape file.
-
- The file format is almost identical to the current Unix
- implementation of compress (V4.0). Files written by
- Unix compress should be readable by lzdcmp. Files
- written by lzcomp in export (-x) format will be readable
- by Unix compress (except that lzcomp outputs two "clear"
- codes to mark EOF. A patch to Unix compress is
- available.)
-
- VMS RESTRICTIONS:
-
- VMS Private mode stores the true name and attributes of
- the input file into the compressed file and lzdcmp
- restores the attributes (and filename if requested).
- The following restrictions apply -- they may be lifted
- in the future as they are primarily due to the author's
- lack of understanding of the intricacies of of VMS I/O:
-
- All files must be stored on disk.
- The lzcomp output file must be specified directly.
-
- Also, for all usage on VMS, the compressed file must be
- written to, and read from disk.
-
- LZW COMPRESSION ALGORITHM:
-
- This section is abstracted from Terry Welch's article
- referenced below. The algorithm builds a string
- translation table that maps substrings in the input into
- Page 3
- lzcomp File Compression
-
-
- fixed-length codes. The compress algorithm may be
- described as follows:
-
- 1. Initialize table to contain single-character
- strings.
- 2. Read the first character. Set <w> (the prefix
- string) to that character.
- 3. (step): Read next input character, K.
- 4. If at end of file, output code(<w>); exit.
- 5. If <w>K is in the string table:
- Set <w> to <w>K; goto step 3.
- 6. Else <w>K is not in the string table.
- Output code(<w>);
- Put <w>K into the string table;
- Set <w> to K; Goto step 3.
-
- "At each execution of the basic step an acceptable input
- string <w> has been parsed off. The next character K is
- read and the extended string <w>K is tested to see if it
- exists in the string table. If it is there, then the
- extended string becomes the parsed string <w> and the
- step is repeated. If <w>K is not in the string table,
- then it is entered, the code for the successfully parsed
- string <w> is put out as comprssed data, the character K
- becomes the beginning of the next string, and the step
- is repeated."
-
- The decompression algorithm translates each received
- code into a prefix string and extension [suffix]
- character. The extension character is stored (in a
- push-down stack), and the prefix translated again, until
- the prefix is a single character, which completes
- decompression of this code. The entire code is then
- output by popping the stack.
-
- "An update to the string table is made for each code
- received (except the first one). When a code has been
- translated, its final character is used as the extension
- character, combined with the prior string, to add a new
- string to the string table. This new string is assigned
- a unique code value, which is the same code that the
- compressor assigned to that string. In this way, the
- decompressor incrementally reconstructs the same string
- table that the decompressor used.... Unfortunately ...
- [the algorithm] does not work for an abnormal case.
-
- The abnormal case occurs whenever an input character
- string contains the sequence K<w>K<w>K, where K<w>
- already appears in the compressor string table."
-
- The decompression algorithm, augmented to handle the
- abnormal case, is as follows:
-
- 1. Read first input code;
- Page 4
- lzcomp File Compression
-
-
- Store in CODE and OLDcode;
- With CODE = code(K), output(K); FINchar = K;
- 2. Read next code to CODE; INcode = CODE;
- If at end of file, exit;
- 3. If CODE not in string table (special case) then
- Output(FINchar);
- CODE = OLDcode;
- INcode = code(OLDcode, FINchar);
-
- 4. If CODE == code(<w>K) then
- Push K onto the stack;
- CODE == code(<w>);
- Goto 4.
-
- 5. If CODE == code(K) then
- Output K;
- FINchar = K;
-
- 6. While stack not empty
- Output top of stack;
- Pop stack;
-
- 7. Put OLDcode,K into the string table.
- OLDcode = INcode;
- Goto 2.
-
- The algorithm as implemented here introduces two
- additional complications.
-
- The actual codes are transmitted using a variable-length
- encoding. The lowest-level routines increase the number
- of bits in the code when the largest possible code is
- transmitted.
-
- Periodically, the algorithm checks that compression is
- still increasing. If the ratio of input bytes to output
- bytes decreases, the entire process is reset. This can
- happen if the characteristics of the input file change.
-
- VMS PRIVATE FILE STRUCTURE:
-
- In VMS Private mode, the compressed data file contains a
- variable-length (but compressed) file header with the
- file "attributes" needed by the operating system to
- construct the file. This allows the decompression
- program to recreate the file in its original format,
- which is essential if ISAM databases are compressed.
-
- The overall file format is as follows:
-
- LZ_SOH "start of header" signal (this value cannot
- appear in user data).
-
- A variable-length data record (maximum 256
- Page 5
- lzcomp File Compression
-
-
- bytes) containing the header name, followed by
- whitespace, followed by header-specific
- information. In this case, the name record will
- contain the string "vms$attributes" followed by
- the number of bytes in the attribute data block.
- (I assume that the name record will consist of a
- facility name, such as "vms", followed by a
- dollar sign, followed by a facility-unique
- word.)
-
- LZ_EOR Signals "end of record".
-
- This is followed by a VMS file attributes record
- (generated by a VMS system library
- routine).
-
- LZ_ETX Signals "end of segment".
-
- ST_STX Signals "start of text" (i.e., start of data
- file).
-
- This is followed by the user data file.
-
- LZ_ETX Signals "end of segment"
-
- LZ_ETX Two in a row signals "end of file".
-
- Note that this format can easily be extended to include
- trailer records (with file counts and checksums) and/or
- multiple data files in one compressed file.
-
- Note also that the LZ_CLEAR code may appear in headers
- or data files to cause the decompression program to
- "readapt" to the characteristics of the input data.
- LZ_STX and LZ_SOH reset the compression algorithm.
- LZ_EOR does not.
-
- AUTHORS:
-
- The algorithm is from "A Technique for High Performance
- Data Compression." Terry A. Welch. IEEE Computer Vol
- 17, No. 6 (June 1984), pp 8-19.
-
- This revision is by Martin Minow.
-
- Unix Compress authors are as follows:
-
- Spencer W. Thomas
- (decvax!harpo!utah-cs!utah-gr!thomas)
- Jim McKie (decvax!mcvax!jim)
- Steve Davies (decvax!vax135!petsd!peora!srd)
- Ken Turkowski (decvax!decwrl!turtlevax!ken)
- James A. Woods (decvax!ihnp4!ames!jaw)
- Joe Orost (decvax!vax135!petsd!joe)
-
- -h- lzdcmp.mem Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZDCMP.MEM;2
-
-
-
-
- ____ _____________ 1 File Decompression
-
-
-
- **********
- * lzdcmp *
- **********
-
-
-
- NAME: lzdcmp -- File Decompression
-
- SYNOPSIS:
-
- lzdcmp [-options] [infile [outfile]]
-
- DESCRIPTION:
-
- lzdcmp decompresses files compressed by lzcomp. The
- documentation for lzcomp describes the process in
- greater detail.
-
- Options may be given in either case.
-
- -B Output file is "binary", not text. (Ignored in
- VMS private mode.)
-
- -X 3 To read files compressed by an old Unix version
- that doesn't generate header records.
-
- -V val Verbose (print status messages and debugging
- information). The value selects the amount of
- verbosity.
-
- AUTHOR:
-
- This version by Martin Minow. See lzcomp for
- more details.
-
-